/**********************************************************************/
/* Creates the analyses and data for Appendix regressions and figures */
/**********************************************************************/

clear
set more off
clear matrix
clear mata
set maxvar 30000

graph set window fontface "Times New Roman"

/********************************************/
/* Appendix A.1: Analysis of 50% answers    */
/********************************************/

clear
use "$savedata\wave1-7base.dta"

** Of all people, distribution of number of other 50% answers given

preserve

replace num50 = num50 -1 if exlo80 == 50

keep if exlo80 >= 0

forv x = 0(1)5 {
gen num50_`x' = num50 == `x'
}

collapse (mean) num50_* 
export excel "$output\Appendix.xls", firstrow(variables) sheetreplace sheet("num 50s")
restore


** Of those who answered 50% to exlo80, distribution of number of other 50% answers given

preserve

replace num50 = num50 -1 if exlo80 == 50

keep if exlo80 == 50

forv x = 0(1)5 {
gen num50_`x' = num50 == `x'
}

collapse (mean) num50_* 

export excel "$output\Appendix.xls", firstrow(variables) sheetreplace sheet("num 50s if said 50")

restore


/**********************************************************************************/
/* Appendix A.2 Analysis correlation between expectations and known risk factors  */
/**********************************************************************************/

/* Regress self-reported LE on age dummies and sex, and on risk factors */
clear
use "$savedata\wave1-7base.dta"

keep if inlist(finstat,1,7,14,25,33)
drop if exlo80 < 0 | exlo80 == . 
drop if exlo80 <= exlo90 & inrange(exlo90,0,100)

recode srh_hrs -8 = 99

foreach var in everbp heart everan everst everdi everas everlu everca everar everos everad everde everpd everps {
recode `var' -9/-1 = .
}

local riskfactors1 "couple ib1.schleave ib3.incomeq ib3.wealthq working ib0.smokerstat ib3.alcohol i.everbp i.heart i.everan i.everst i.everdi i.everas i.everlu i.everca i.everar i.everos i.everad i.everde i.everpd i.everps c.irecall c.drecall ib3.agepadied ib3.agemadied nonwhite"
local riskfactors2 "couple ib1.schleave ib3.incomeq ib3.wealthq working ib0.smokerstat ib3.alcohol i.everbp i.heart i.everan i.everst i.everdi i.everas i.everlu i.everca i.everar i.everos i.everad i.everde i.everpd i.everps c.irecall c.drecall ib3.agepadied ib3.agemadied nonwhite ib3.srh_hrs"

eststo male_SRH : reg exlo80 i.age#i.wave `riskfactors2' if sex==1, cluster(idauniq) 

eststo male_noSRH : reg exlo80 i.age#i.wave `riskfactors1' if sex==1, cluster(idauniq)  

local riskfactors1 "couple ib1.schleave ib3.incomeq ib3.wealthq working ib0.smokerstat ib3.alcohol i.everbp i.heart i.everan i.everst i.everdi i.everas i.everlu i.everca i.everar i.everos i.everad i.everde i.everpd i.everps c.irecall c.drecall ib3.agepadied ib3.agemadied nonwhite"
local riskfactors2 "couple ib1.schleave ib3.incomeq ib3.wealthq working ib0.smokerstat ib3.alcohol i.everbp i.heart i.everan i.everst i.everdi i.everas i.everlu i.everca i.everar i.everos i.everad i.everde i.everpd i.everps c.irecall c.drecall ib3.agepadied ib3.agemadied nonwhite ib3.srh_hrs"

eststo female_SRH : reg exlo80 i.age#i.wave `riskfactors2' if sex==2, cluster(idauniq)  

eststo female_noSRH : reg exlo80 i.age#i.wave `riskfactors1' if sex==2, cluster(idauniq) 

* adjust some labels for the table *
label define everca 0 "No Cancer" 1 "Cancer"
label define heart 0 "No" 1 "Heart condition"
label define everbp 0 "No" 1 "Hypertension"
label define everst 0 "No" 1 "Stroke"
label values everca everca 
label values heart heart 
label values everbp everbp 
label values everst everst
label variable exlo80 "exlo80"

local keep_coeffs "1.smokerstat 2.smokerstat  3.smokerstat 4.smokerstat 1.alcohol 2.alcohol  4.alcohol  5.alcohol 1.agemadied 2.agemadied 3.agemadied 4.agemadied 5.agemadied 6.agemadied 7.agemadied  8.agemadied 1.agepadied 2.agepadied 3.agepadied 4.agepadied 5.agepadied 6.agepadied 7.agepadied  8.agepadied 1.everca 1.heart 1.everbp 1.everst"

esttab male_noSRH female_noSRH male_SRH female_SRH using "$output\Table2.tex",  keep(`keep_coeffs') replace not b(1) label

/**************************************************************************/
/* 	Appendix A.2: See if news relates to revisions in expectations		  */
/**************************************************************************/

clear
use "$savedata\wave1-7base.dta"

keep idauniq wave exlo80 exlo90 new* age cohort5 dob dobyear sex finstat 

drop if dobyear < 0

drop if exlo80 < 0 | exlo80 == . 
drop if exlo80 <= exlo90 & inrange(exlo90,0,100)

recode exlo80 -9/-1 =.
recode exlo90 -9/-1 =.

foreach var in newlu newas newar newos newca newpd newps newad newde newbl newbp newan newmi newhf newhm newhr newdi newst newch {
recode `var' -9/-1 = 99
}

* fixed effects regression controlling for age *
xtset idauniq wave

eststo rev1: xtreg exlo80 i.newlu i.newas i.newar i.newos i.newca i.newpd i.newps i.newad i.newde i.newbl i.newbp i.newan i.newmi i.newhf i.newhm i.newhr i.newdi i.newst i.newch age, fe cluster(idauniq) 

eststo rev2: xtreg exlo90 i.newlu i.newas i.newar i.newos i.newca i.newpd i.newps i.newad i.newde i.newbl i.newbp i.newan i.newmi i.newhf i.newhm i.newhr i.newdi i.newst i.newch age, fe cluster(idauniq)

label define newad 0 "None" 1 "Alzheimer's Disease" 99 "Missing"
label define newca 0 "None" 1 "Cancer" 99 "Missing"
label define newde 0 "None" 1 "Dementia" 99 "Missing"
label define newmi 0 "None" 1 "Heart Attack" 99 "Missing"
label define newlu 0 "None" 1 "Lung Disease" 99 "Missing"
label define newpd 0 "None" 1 "Parkinson's Disease" 99 "Missing"
label define newps 0 "None" 1 "Psychiatric problems" 99 "Missing"
label define newst 0 "None" 1 "Stroke" 99 "Missing"
foreach var in newad newca newde newmi newlu newpd newps newst {
	label values `var' `var'
}

esttab rev1 rev2 using "$output\Table3.tex",  keep(1.newad 1.newca 1.newde 1.newmi 1.newlu 1.newpd 1.newps 1.newst) order(1.newad 1.newca 1.newde 1.newmi 1.newlu 1.newpd 1.newps 1.newst) replace not b(1) label noomitted

/************************************************************************/
/* 		Appendix A.2: How does P75 correlate with actual survival?	    */
/************************************************************************/

clear
use "$savedata\wave1-7base.dta"

gen     sample = exlo80<=100 & (exlo90<=100 | exlo90 == .) 
replace sample = 0 if exlo90>=exlo80 & exlo90 !=.
keep if sample == 1

recode exlo80 -9/-1 =.
recode exlo90 -9/-1 =.
recode srh_hrs -8 = 99

gen probdie = 100-exlo80 if exlo80>=0 & exlo80<.

gen probdie_cat = .
replace probdie_cat = ceil(probdie/10+0.000000001)
label define die_cat 1 "0-9" 2 "10-19" 3 "20-29" 4 "30-39" 5 "40-49" 6 "50-59" 7 "60-69" 8 "70-79" 9 "80-89" 10 "90-99" 11 "100"
label values probdie_cat die_cat

* Make figure *
preserve
keep if dead10yr !=.
collapse (mean) dead10yr exlo80 [pw=wgt]
replace exlo80 = (100-exlo80)/100
gen probdie_cat = 999
append using "$temp\subsequent_death.dta"
graph bar dead10yr , over(probdie_cat, relabel(1 "0-9" 2 "10-19" 3 "20-29" 4 "30-39" 5 "40-49" 6 "50-59" 7 "60-69" 8 "70-79" 9 "80-89" 10 "90-99" 11 "100" 12 "All")) legend(off) graphregion(color(white)) yscale(range(0 0.6)) ylabel(0 "0%" .1 "10%" 0.2 "20%" .3 "30%" 0.4 "40%" .5 "50%" 0.6 "60%", angle(horizontal) grid glpattern(dash) glcolor(black) glwidth(vthin) nogmin) ytitle("Mortality rate") b1title("Reported mortality probability (%)")aspect(0.5) ysize(3) bar(1, color(gs5)) graphregion(margin(2 2 2 2))
graph export "$output\Fig8.eps", replace
restore

* test statistical significance - 10yr *
preserve
drop if probdie_cat2 ==.
keep if dead10yr !=.
probit dead10yr probdie i.age i.sex
outreg2 using "$output\subsequent_death_tests_probit.xls", keep(probdie) pvalue replace
reg dead10yr probdie i.age i.sex
outreg2 using "$output\subsequent_death_tests.xls", keep(probdie) pvalue 
restore

* test statistical significance - 10yr - controlling for risk factors *
preserve
drop if probdie_cat2 ==.
keep if dead10yr !=.
probit dead10yr probdie i.age i.sex `riskfactors1'
outreg2 using "$output\subsequent_death_tests_controls.xls", keep(probdie) pvalue replace
probit dead10yr probdie i.age i.sex `riskfactors2'
outreg2 using "$output\subsequent_death_tests_controls.xls", keep(probdie) 
restore

/************************************************************************/
/* 		Appendix A.2: How does P75 correlate with life insurance 	    */
/************************************************************************/

clear
use  "$savedata\wave1-7base.dta"
keep if elsa == 1
* using waves 3 to 5 to align sample with model *
keep if inrange(wave,3,5)

* merge in pension wealth variables *
gen pension_wlth = . 
merge m:1 idauniq using "$penv3", keepusing(pripenw_spa)
drop if _m == 2
replace pension_wlth = pripenw_spa if wave == 3
drop _m
merge m:1 idauniq using "$penv4", keepusing(pripenw_spa) update replace 
drop if _m == 2
replace pension_wlth = pripenw_spa if wave == 4
drop _m
merge m:1 idauniq using "$penv5", keepusing(pripenw_spa) update replace
drop if _m == 2
replace pension_wlth = pripenw_spa if wave == 5
drop _m
drop pripenw_spa

* merge in scaled objective survival probabilities *
replace dobyear = year(dob) if dobyear < 0 // those born before 1912 
drop prob* 
merge m:1 dobyear sex age using "$temp\prob_survive_scaled.dta"
assert _m != 1 if age != . & dobyear >= 1920
drop if _m == 2
drop _m

* merge in whether in sample for main results *
merge 1:1 idauniq wave using "$savedata\ids_firstonly.dta"
assert _m != 2
gen in_main_results = _m == 3
drop _m 

* clean value of LI payout variable *
rename ialiya LI_payout
recode LI_payout -9/-8 = . 
* recode not applicable as zero *
recode LI_payout -1 = 0
* if said that don't have life insurance but then
* create dummy for said has some LI: some people report the amount that will be received by others when THEY die
* even though haven't answered previous question about whether they or parter has life insurance. so use either. *
gen has_LI = inlist(ialiw,1,3) | (LI_payout > 0 & LI_payout != .)
* if said that have LI but amount coded as zero then set amount to missing *
replace LI_payout = . if has_LI == 1 & LI_payout == 0
replace LI_payout = 0 if has_LI == 0

* clean whether has a child *
recode scchd -9/-1 = 99

* clean partner variables *
gen has_partner = idauniq_p != -1
recode age_p -9/-1 = 999
recode sex_p -9/-1 = 999
recode exlo80 -9/-1 = .
recode exlo90 -9/-1 = .
replace exlo80 = exlo80/100
replace exlo90 = exlo90/100

* get probability that die in next 5 years *
gen die_5 = .
forv age = 50(1)110 {
	replace die_5 = 1 - prob`age' if age + 5 == `age'
}

* get probability that die in next 10 years *
gen die_10 = .
forv age = 50(1)110 {
	replace die_10 = 1 - prob`age' if age + 10 == `age'
}

* get probability that die in next 15 years *
gen die_15 = .
forv age = 50(1)110 {
	replace die_15 = 1 - prob`age' if age + 15 == `age'
}

* top code age and partner age at 90 to deal with small cells *
replace age = 90 if age > 90
replace age_p = 50 if age_p < 50 
replace age_p = 70 if age_p > 70 & age_p < 999

* 3 obs with missing partner age but non-missing sex. set sex to missing *
replace sex_p = 999 if age_p == 999

* create LI as a share of wealth: inc housing *
replace grosshw_bu_s = grosshw_bu_s/2 if idauniq_p !=.
gen E_LI_payout = LI_payout * die_10
gen tot_wlth = grosshw_bu_s+pension_wlth+E_LI_payout
gen LI_share = E_LI_payout/(tot_wlth)

* create equivalent vars for sensitity for 5 and 15 year terms *

gen E_LI_payout5 = LI_payout * die_5
gen tot_wlth5 = grosshw_bu_s+pension_wlth+E_LI_payout5
gen LI_share5 = E_LI_payout5/(tot_wlth5)

gen E_LI_payout15 = LI_payout * die_15
gen tot_wlth15 = grosshw_bu_s+pension_wlth+E_LI_payout15
gen LI_share15 = E_LI_payout15/(tot_wlth15)

*** REGRESSION ANALYSIS ***

qui {
	* controlling for age, sex, partner and kids *
	eststo nocont1: reg LI_share exlo80 i.sex i.age#i.sex i.scchd has_partner i.sex_p i.age_p#i.sex_p if inrange(age,50,69) & LI_share !=. , vce(cluster idauniq)
	eststo nocont2: reg LI_share exlo80 i.sex i.age#i.sex i.scchd has_partner i.sex_p i.age_p#i.sex_p if inrange(age,50,69) &LI_share >0 & LI_share != . , vce(cluster idauniq)
	* controlling for age, sex, partner, kids and total wealth *
	eststo cont1: reg LI_share exlo80 i.sex i.age#i.sex i.scchd has_partner i.sex_p i.age_p#i.sex_p tot_wlth if inrange(age,50,69) , vce(cluster idauniq)
	eststo cont2: reg LI_share exlo80 i.sex i.age#i.sex i.scchd has_partner i.sex_p i.age_p#i.sex_p tot_wlth if inrange(age,50,69) & LI_share >0 & LI_share != . , vce(cluster idauniq)
	}
	
local keep_coeffs "exlo80"
esttab nocont1 cont1 nocont2 cont2  using "$output\Table4.tex",  keep(`keep_coeffs') se  replace starlevels(* 0.1 ** 0.05 *** 0.01)
